import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 读取 Excel 文件
file_path = "表1.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# 剔除 CaO 中缺失值的行，并创建副本
df_cleaned = df.dropna(subset=['CaO']).copy()

# 计算 Na2O 的均值
mean_Na2O = df_cleaned['Na2O'].mean()

# 用 Na2O 的均值填充缺失值
df_cleaned = df_cleaned.assign(Na2O=df_cleaned['Na2O'].fillna(mean_Na2O))

# 保存清理后的数据
df_cleaned.to_excel("cleaned_data.xlsx", index=False)

# 读取清理后的数据
file_path = "cleaned_data.xlsx"  # 请修改为你的文件路径
df_cleaned = pd.read_excel(file_path)

# 绘制散点图观察 CaO 与 Na2O 的关系
plt.figure(figsize=(8, 6))
sns.scatterplot(x=df_cleaned['CaO'], y=df_cleaned['Na2O'], alpha=0.7)

# 添加回归拟合线
sns.regplot(x=df_cleaned['CaO'], y=df_cleaned['Na2O'], scatter=False, color='red')

# 添加中文标题和标签
plt.title("CaO 与 Na2O 的散点图", fontproperties="SimHei")
plt.xlabel("CaO 含量", fontproperties="SimHei")
plt.ylabel("Na2O 含量", fontproperties="SimHei")

# 显示图像
plt.show()
